###############################################
# Replication Code for                        #
# "Why Process Matters for Causal Inference"  #
# by Adam N. Glynn and Kevin M. Quinn         #
############################################### 


get.ATC <- function(y, ypred.treated, x){
  control.indic <- x == 0
  missing.indic <- is.na(ypred.treated)
  y.sub <- y[control.indic & !missing.indic]
  ypred.treated.sub <- ypred.treated[control.indic & !missing.indic]
  ATC <- mean(ypred.treated.sub - y.sub)
  return(ATC)
}


create.output.column <- function(outmat, glm.out, ATC.hat, ATC.interval,
                                 col.index){
  beta <- coef(glm.out)
  ci <- confint(glm.out)
  for (var in names(beta)){
    outmat[var, col.index] <- as.character(round(beta[var], 3))
    next.ind <- which(rownames(outmat) == var) + 1
    ci.string <- paste("[", round(ci[var,1], 3), ", ",
                       round(ci[var,2], 3), "]", sep="")
    outmat[next.ind, col.index] <- ci.string
  }
  outmat["ATC", col.index] <- ATC.hat
  next.ind <- which(rownames(outmat) == "ATC") + 1
  outmat[next.ind, col.index] <- ATC.interval
  outmat["N", col.index] <- glm.out$df.null + 1
  return(outmat)
}




## get the CPS data and process it
load("CPSvote2004.RData")


VOTECPSnd <- subset(VOTECPS, GESTFIPS != 38)

EDR <- rep(0,dim(VOTECPSnd)[1])
EDR[VOTECPSnd$GESTFIPS == 16] <- 1 # Idaho
EDR[VOTECPSnd$GESTFIPS == 27 ] <- 1 #Maine
EDR[VOTECPSnd$GESTFIPS == 26 ] <- 1 # Minnesota
EDR[VOTECPSnd$GESTFIPS == 33] <- 1 # New Hampshire
EDR[VOTECPSnd$GESTFIPS == 55] <- 1 # Wisconsin
EDR[VOTECPSnd$GESTFIPS == 56] <- 1 # Wyoming

VOTECPSnd <- data.frame(VOTECPSnd,EDR)
VOTECPSnd <- subset(VOTECPSnd,VOTE == 1 | VOTE == 2)
VOTECPSnd <- subset(VOTECPSnd,REG == 1 | REG == 2)

VOTECPSnd$REG[VOTECPSnd$REG ==2] <- 0
VOTECPSnd$VOTE[VOTECPSnd$VOTE==2] <- 0 

## get just blacks
VOTECPSndB <- subset(VOTECPSnd,PTDTRACE == 2)

##x <- VOTECPSndB$EDR
##m <- VOTECPSndB$REG
##y <- VOTECPSndB$VOTE

mydata <- data.frame(vote=VOTECPSndB$VOTE,
                     edr=VOTECPSndB$EDR,
                     famincome=VOTECPSndB$HUFAMINC,
                     age=VOTECPSndB$PRTAGE,
                     sex=VOTECPSndB$PESEX,
                     educ=VOTECPSndB$PEEDUCA,
                     hisp=VOTECPSndB$PEHSPNON)

mydata$age[mydata$age > 80] <- NA ## get rid of values above what the codebook
                                  ## says is the max

mydata$famincome[mydata$famincome < 1 ] <- NA ## get rid of values below what
                                              ## the codebook says is the min



#mydata <- na.omit(mydata) ## listwise deletion (useful for matching code)


glm.out <- glm(vote ~ edr*famincome*sex + edr*age*sex + edr*educ*sex,
               family="binomial", data=mydata)

print(summary(glm.out))


data.treated <- mydata
data.treated$edr <- 1

ATC.hat <- get.ATC(mydata$vote,
               predict(glm.out, newdata=data.treated, type="response"),
               mydata$edr)

## bootstrap SEs
set.seed(72303)
B <- 1001
n <- nrow(mydata)
ATC.samps <- rep(NA, B)
for (i in 1:B){
  bs.inds <- sample(1:n, size=n, replace=TRUE)
  bs.data <- mydata[bs.inds,]
  glm.bs.out <- glm(vote ~ edr*famincome*sex + edr*age*sex + edr*educ*sex,
                    family="binomial", data=bs.data)

  ATC.samps[i] <- get.ATC(mydata$vote,
                          predict(glm.bs.out, newdata=data.treated,
                                  type="response"),
                          mydata$edr)
  
}

cat("*******************************************\n")
cat("Estimated ATC:   ", round(ATC.hat, 3), "\n")
cat("Standard Error:  ", round(sd(ATC.samps), 3), "\n")
cat("95% CI:          ", "[", round(quantile(ATC.samps, .025), 3),
    ", ", round(quantile(ATC.samps, .975), 3), "]\n", sep="")
cat("*******************************************\n")

ATC.hat <- as.character(round(ATC.hat, 3))
ATC.interval <- paste("[", round(quantile(ATC.samps, .025), 3),
    ", ", round(quantile(ATC.samps, .975), 3), "]", sep="")





outmat <- matrix(NA, nrow=(length(coef(glm.out))*3+4), ncol=9)
counter <- 1
rownames(outmat) <- 1:nrow(outmat)
for (i in 1:length(coef(glm.out))){
  rownames(outmat)[counter] <- names(coef(glm.out))[i]
  counter <- counter + 3
}
rownames(outmat)[counter] <- "ATC"
rownames(outmat)[counter+3] <- "N"

outmat <- create.output.column(outmat, glm.out, ATC.hat, ATC.interval, 1)









## no 3-ways
myform <- vote ~ edr*famincome + famincome*sex + edr*sex +
               edr*age + age*sex + edr*educ + educ*sex
glm.out <- glm(myform,
               family="binomial", data=mydata)


ATC.samps <- rep(NA, B)
for (i in 1:B){
  bs.inds <- sample(1:n, size=n, replace=TRUE)
  bs.data <- mydata[bs.inds,]
  glm.bs.out <- glm(myform,
                    family="binomial", data=bs.data)

  ATC.samps[i] <- get.ATC(mydata$vote,
                          predict(glm.bs.out, newdata=data.treated,
                                  type="response"),
                          mydata$edr)
  
}

ATC.hat <- get.ATC(mydata$vote,
                   predict(glm.out, newdata=data.treated, type="response"),
                   mydata$edr)
ATC.hat <- as.character(round(ATC.hat, 3))
ATC.interval <- paste("[", round(quantile(ATC.samps, .025), 3),
    ", ", round(quantile(ATC.samps, .975), 3), "]", sep="")


outmat <- create.output.column(outmat, glm.out, ATC.hat, ATC.interval, 2)






## no sex interaction
myform <- vote ~ edr*famincome + edr*sex + edr*age  + edr*educ
glm.out <- glm(myform,
               family="binomial", data=mydata)


ATC.samps <- rep(NA, B)
for (i in 1:B){
  bs.inds <- sample(1:n, size=n, replace=TRUE)
  bs.data <- mydata[bs.inds,]
  glm.bs.out <- glm(myform,
                    family="binomial", data=bs.data)

  ATC.samps[i] <- get.ATC(mydata$vote,
                          predict(glm.bs.out, newdata=data.treated,
                                  type="response"),
                          mydata$edr)
  
}

ATC.hat <- get.ATC(mydata$vote,
                   predict(glm.out, newdata=data.treated, type="response"),
                   mydata$edr)
ATC.hat <- as.character(round(ATC.hat, 3))
ATC.interval <- paste("[", round(quantile(ATC.samps, .025), 3),
    ", ", round(quantile(ATC.samps, .975), 3), "]", sep="")


outmat <- create.output.column(outmat, glm.out, ATC.hat, ATC.interval, 3)





## no interaction
myform <- vote ~ edr + famincome + sex + age  + educ
glm.out <- glm(myform,
               family="binomial", data=mydata)


ATC.samps <- rep(NA, B)
for (i in 1:B){
  bs.inds <- sample(1:n, size=n, replace=TRUE)
  bs.data <- mydata[bs.inds,]
  glm.bs.out <- glm(myform,
                    family="binomial", data=bs.data)

  ATC.samps[i] <- get.ATC(mydata$vote,
                          predict(glm.bs.out, newdata=data.treated,
                                  type="response"),
                          mydata$edr)
  
}

ATC.hat <- get.ATC(mydata$vote,
                   predict(glm.out, newdata=data.treated, type="response"),
                   mydata$edr)
ATC.hat <- as.character(round(ATC.hat, 3))
ATC.interval <- paste("[", round(quantile(ATC.samps, .025), 3),
    ", ", round(quantile(ATC.samps, .975), 3), "]", sep="")


outmat <- create.output.column(outmat, glm.out, ATC.hat, ATC.interval, 4)








## no educ
myform <- vote ~ edr + famincome + sex + age 
glm.out <- glm(myform,
               family="binomial", data=mydata)


ATC.samps <- rep(NA, B)
for (i in 1:B){
  bs.inds <- sample(1:n, size=n, replace=TRUE)
  bs.data <- mydata[bs.inds,]
  glm.bs.out <- glm(myform,
                    family="binomial", data=bs.data)

  ATC.samps[i] <- get.ATC(mydata$vote,
                          predict(glm.bs.out, newdata=data.treated,
                                  type="response"),
                          mydata$edr)
  
}

ATC.hat <- get.ATC(mydata$vote,
                   predict(glm.out, newdata=data.treated, type="response"),
                   mydata$edr)
ATC.hat <- as.character(round(ATC.hat, 3))
ATC.interval <- paste("[", round(quantile(ATC.samps, .025), 3),
    ", ", round(quantile(ATC.samps, .975), 3), "]", sep="")


outmat <- create.output.column(outmat, glm.out, ATC.hat, ATC.interval, 5)




## no age
myform <- vote ~ edr + famincome + sex  + educ
glm.out <- glm(myform,
               family="binomial", data=mydata)


ATC.samps <- rep(NA, B)
for (i in 1:B){
  bs.inds <- sample(1:n, size=n, replace=TRUE)
  bs.data <- mydata[bs.inds,]
  glm.bs.out <- glm(myform,
                    family="binomial", data=bs.data)

  ATC.samps[i] <- get.ATC(mydata$vote,
                          predict(glm.bs.out, newdata=data.treated,
                                  type="response"),
                          mydata$edr)
  
}

ATC.hat <- get.ATC(mydata$vote,
                   predict(glm.out, newdata=data.treated, type="response"),
                   mydata$edr)
ATC.hat <- as.character(round(ATC.hat, 3))
ATC.interval <- paste("[", round(quantile(ATC.samps, .025), 3),
    ", ", round(quantile(ATC.samps, .975), 3), "]", sep="")


outmat <- create.output.column(outmat, glm.out, ATC.hat, ATC.interval, 6)





## no sex
myform <- vote ~ edr + famincome  + age  + educ
glm.out <- glm(myform,
               family="binomial", data=mydata)


ATC.samps <- rep(NA, B)
for (i in 1:B){
  bs.inds <- sample(1:n, size=n, replace=TRUE)
  bs.data <- mydata[bs.inds,]
  glm.bs.out <- glm(myform,
                    family="binomial", data=bs.data)

  ATC.samps[i] <- get.ATC(mydata$vote,
                          predict(glm.bs.out, newdata=data.treated,
                                  type="response"),
                          mydata$edr)
  
}

ATC.hat <- get.ATC(mydata$vote,
                   predict(glm.out, newdata=data.treated, type="response"),
                   mydata$edr)
ATC.hat <- as.character(round(ATC.hat, 3))
ATC.interval <- paste("[", round(quantile(ATC.samps, .025), 3),
    ", ", round(quantile(ATC.samps, .975), 3), "]", sep="")


outmat <- create.output.column(outmat, glm.out, ATC.hat, ATC.interval, 7)





## no faminc
myform <- vote ~ edr + sex + age  + educ
glm.out <- glm(myform,
               family="binomial", data=mydata)


ATC.samps <- rep(NA, B)
for (i in 1:B){
  bs.inds <- sample(1:n, size=n, replace=TRUE)
  bs.data <- mydata[bs.inds,]
  glm.bs.out <- glm(myform,
                    family="binomial", data=bs.data)

  ATC.samps[i] <- get.ATC(mydata$vote,
                          predict(glm.bs.out, newdata=data.treated,
                                  type="response"),
                          mydata$edr)
  
}

ATC.hat <- get.ATC(mydata$vote,
                   predict(glm.out, newdata=data.treated, type="response"),
                   mydata$edr)
ATC.hat <- as.character(round(ATC.hat, 3))
ATC.interval <- paste("[", round(quantile(ATC.samps, .025), 3),
    ", ", round(quantile(ATC.samps, .975), 3), "]", sep="")


outmat <- create.output.column(outmat, glm.out, ATC.hat, ATC.interval, 8)




## just edr
myform <- vote ~ edr
glm.out <- glm(myform,
               family="binomial", data=mydata)


ATC.samps <- rep(NA, B)
for (i in 1:B){
  bs.inds <- sample(1:n, size=n, replace=TRUE)
  bs.data <- mydata[bs.inds,]
  glm.bs.out <- glm(myform,
                    family="binomial", data=bs.data)

  ATC.samps[i] <- get.ATC(mydata$vote,
                          predict(glm.bs.out, newdata=data.treated,
                                  type="response"),
                          mydata$edr)
  
}

ATC.hat <- get.ATC(mydata$vote,
                   predict(glm.out, newdata=data.treated, type="response"),
                   mydata$edr)
ATC.hat <- as.character(round(ATC.hat, 3))
ATC.interval <- paste("[", round(quantile(ATC.samps, .025), 3),
    ", ", round(quantile(ATC.samps, .975), 3), "]", sep="")


outmat <- create.output.column(outmat, glm.out, ATC.hat, ATC.interval, 9)



save.image(file="CPS-regression.Rdata")


## Table 2 in the article
library(xtable)
print(xtable(outmat))


